In [8]:
import pandas as pd
import pyreadstat
import warnings
import numpy as np
from scipy.optimize import fsolve
import scipy.stats as si
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.io as pio
warnings.filterwarnings("ignore")
In [9]:
def distance_default(df,col_name,v,sigmav):
    df[col_name] = (np.log(df[v]/df["f_lagged"])+(df["r"]-df[sigmav]**2/2))/df[sigmav]


def probability_default(df,col_name,dd_col_name):
    df[col_name] = si.norm.cdf(-df[dd_col_name], 0.0, 1.0)
    
def plot_chart(data,title_name,y1_name,economic_variable = False):
    x = data["fyear"].values
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    end = 2 if economic_variable else 1
    for i in range(len(data.columns)-end):
        fig.add_trace(go.Scatter(name=data.columns[i+1],x=x, y=data.iloc[:,i+1]),secondary_y=False)
    fig.update_yaxes(title_text="Different Methods "+str(y1_name), secondary_y=False)
    if(economic_variable):
        fig.add_trace(go.Scatter(name=data.columns[-1],x=x, y=data.iloc[:,-1]), secondary_y=True)
        fig.update_yaxes(title_text="Economic Indicator in %", secondary_y=True)      
    fig.update_layout(barmode='relative',showlegend = True, title_text="Line Chart for "+title_name)
    fig.show()

def equations(variables,*args) :
    (v,sigmav) = variables
    (f_lagged,r,e, sigmae) = args
    d1 = (np.log(v/f_lagged) +(r+0.5*sigmav**2))/sigmav 
    d2 = d1-sigmav
    eq1 = e - v*si.norm.cdf(d1, 0.0, 1.0) + np.exp(-r)*f_lagged*si.norm.cdf(d2, 0.0, 1.0)
    eq2 = sigmae - (v*si.norm.cdf(d1, 0.0, 1.0)*sigmav)/e
    return (eq1, eq2)

def equation_v(variables,*args) :
    (v) = variables
    (f_lagged,r,e,sigmav,time) = args
    d1 = (np.log(v/f_lagged) +(r+0.5*sigmav**2)*time)/(sigmav*np.sqrt(time)) 
    d2 = d1-sigmav*np.sqrt(time)
    eq1 = e - v*si.norm.cdf(d1, 0.0, 1.0) + np.exp(-r*time)*f_lagged*si.norm.cdf(d2, 0.0, 1.0)
    return (eq1)

def naive_method(df,lin_fac,mul_fac):
    sigma_d = df["sigmae"]*mul_fac + lin_fac
    df["v"+"_"+str(lin_fac)+"_"+str(mul_fac)] = df["e"] + df["f_lagged"]
    df["sigmav"+"_"+str(lin_fac)+"_"+str(mul_fac)] = (df["e"]*df["sigmae"]/df["v"+"_"+str(lin_fac)+"_"+str(mul_fac)]) + (df["f_lagged"]*sigma_d/df["v"+"_"+str(lin_fac)+"_"+str(mul_fac)])
    distance_default(df,"dd_naive"+"_"+str(lin_fac)+"_"+str(mul_fac),"v"+"_"+str(lin_fac)+"_"+str(mul_fac),"sigmav"+"_"+str(lin_fac)+"_"+str(mul_fac))

def solver_method(df):
    df["v_solver"] = 0
    df["sigmav_solver"] = 0
    df["dd_solver"] = 0
    for i in range(len(df)):
        if(df.loc[i,:].isnull().values.any()):
            continue
        f_lagged = df.loc[i,"f_lagged"]
        r = df.loc[i,"r"]
        e = df.loc[i,"e"]
        sigmae = df.loc[i,"sigmae"]
        initial = (f_lagged,r, e,sigmae)
        df.loc[i,["v_solver","sigmav_solver"]] = fsolve(equations,((e+f_lagged),0.10),args = initial)
    distance_default(df,"dd_solver","v_solver","sigmav_solver")
In [6]:
##Reading the interest rate data
daily_fed = pd.read_csv('/Users/global_minima/Downloads/DTB3.csv')
daily_fed.columns = daily_fed.columns.str.strip().str.lower()
daily_fed["date"] = pd.to_datetime(daily_fed["date"])
daily_fed["dtb3"] = pd.to_numeric(daily_fed["dtb3"], errors='coerce')
daily_fed = daily_fed.dropna(subset = ["date","dtb3"]).reset_index(drop = True)
daily_fed["fyear"] = daily_fed["date"].dt.year
daily_fed["r"] = np.log(1+daily_fed["dtb3"]/100.0) 
int_rates = daily_fed.groupby(by = "fyear")["r"].first()

use_cols = ["cusip","fyear","dlc","dltt","indfmt","datafmt","popsrc","fic","consol"]
funda_data= pd.read_csv("/Users/global_minima/funda.csv",usecols=use_cols)

funda_data_filtered = funda_data[(funda_data["indfmt"]=="INDL")&(funda_data["datafmt"]=="STD")&
           (funda_data["popsrc"]=="D")&(funda_data["fic"]=="USA")&
           (funda_data["consol"]=="C")&(funda_data["fyear"]>=1970)&
           (funda_data["fyear"]<=2020)]
del funda_data
funda_data_filtered['cusip'] = funda_data_filtered['cusip'].str[0:6]
funda_data_filtered["date"] =  pd.to_datetime(funda_data_filtered['fyear'],format = "%Y")
funda_data_filtered['fyear'] = funda_data_filtered["date"].dt.year
funda_data_filtered['dlc'] = funda_data_filtered['dlc']*1000000
funda_data_filtered['dltt'] = funda_data_filtered['dltt']*1000000
funda_data_filtered = funda_data_filtered.sort_values("fyear").reset_index(drop = True)
funda_data_filtered["f"] = funda_data_filtered["dlc"] + funda_data_filtered["dltt"]*0.5
funda_data_filtered["f_lagged"] = funda_data_filtered.groupby(by = ["cusip"])["f"].shift()

print(funda_data_filtered.head(10))
   fyear indfmt consol popsrc datafmt   cusip         dlc        dltt  fic  \
0   1970   INDL      C      D     STD  000032  12378000.0    917000.0  USA   
1   1970   INDL      C      D     STD  108055    289000.0    464000.0  USA   
2   1970   INDL      C      D     STD  597715   5609000.0  46870000.0  USA   
3   1970   INDL      C      D     STD  771010    635000.0   1042000.0  USA   
4   1970   INDL      C      D     STD  461097   1219000.0  71878000.0  USA   
5   1970   INDL      C      D     STD  46121H    210000.0    876000.0  USA   
6   1970   INDL      C      D     STD  349406         0.0         0.0  USA   
7   1970   INDL      C      D     STD     NaN    734000.0   6755000.0  USA   
8   1970   INDL      C      D     STD  461110   1991000.0  11881000.0  USA   
9   1970   INDL      C      D     STD  549577   3937000.0  68387000.0  USA   

        date           f  f_lagged  
0 1970-01-01  12836500.0       NaN  
1 1970-01-01    521000.0       NaN  
2 1970-01-01  29044000.0       NaN  
3 1970-01-01   1156000.0       NaN  
4 1970-01-01  37158000.0       NaN  
5 1970-01-01    648000.0       NaN  
6 1970-01-01         0.0       NaN  
7 1970-01-01   4111500.0       NaN  
8 1970-01-01   7931500.0       NaN  
9 1970-01-01  38130500.0       NaN  
In [ ]:
daily_data = pd.read_csv('/Users/global_minima/Downloads/daily_crsp.csv',dtype = {"CUSIP":"object"})
daily_data.columns = daily_data.columns.str.strip().str.lower()
daily_data = daily_data[daily_data["date"]>=19700101]
daily_data["date"] = pd.to_datetime(daily_data["date"],format = "%Y%m%d")
daily_data["fyear"] = daily_data["date"].dt.year
daily_data = daily_data.sort_values("date").reset_index(drop = True)
daily_data["cusip"] = daily_data['cusip'].str[0:6]
daily_data["e"] = abs(daily_data["prc"]*daily_data["shrout"]*1000)
daily_data = daily_data.dropna(subset=["prc"])
daily_data = daily_data.dropna(subset=["ret"])
daily_data["ret"] = pd.to_numeric(daily_data["ret"], errors='coerce')
print(daily_data.head(10))

Stock Data Cleaning

In [38]:
annret = pd.DataFrame(daily_data.groupby(by=['cusip','fyear']).apply(lambda x:
                                                       np.exp(np.sum(np.log(1+x['ret'])))))
annret = annret.rename(columns = {0:"cum_ret"})
annret = pd.DataFrame(annret.groupby(by=['cusip'])["cum_ret"].shift())
sigmae = pd.DataFrame(daily_data.groupby(by=['cusip','fyear'])['ret'].std()*np.sqrt(250))
sigmae = sigmae.rename(columns = {"ret":"sigmae"})
sigmae =  pd.DataFrame(sigmae.groupby(by = ["cusip"])["sigmae"].shift())
equity_value = pd.DataFrame(daily_data.groupby(by=['cusip','fyear'])['e'].first())
final = pd.merge(annret,sigmae, on = ["cusip","fyear"],how = "inner")
final = final.merge(funda_data_filtered[["cusip","fyear","f_lagged"]],on = ["cusip","fyear"],how = "inner")

final = final.merge(int_rates,on = "fyear",how = "inner")
final = final.merge(equity_value,on = ["cusip","fyear"],how = "inner")
final.sort_values("fyear").reset_index(drop  = True, inplace=True)
print(final.tail(10))
Started Cleaning the data and calculation of cumulative returns
         cusip  fyear   cum_ret    sigmae     f_lagged         r             e
269890  989428   1982  0.716865  0.567205          0.0  0.107867  4.794625e+07
269891  989484   1982  1.518552  0.226304    7547500.0  0.107867  9.359288e+07
269892  989506   1982  1.221794  0.320911   26662500.0  0.107867  2.828800e+07
269893  989569   1982  2.132589  0.529422    1298000.0  0.107867  2.272125e+07
269894  989701   1982  1.437301  0.170502  218363500.0  0.107867  1.574245e+08
269895  989755   1982  1.057120  0.301246   11110000.0  0.107867  1.463475e+07
269896  989824   1982  1.385791  0.422511   17989000.0  0.107867  1.441522e+08
269897  989875   1982       NaN       NaN          NaN  0.107867  5.188750e+06
269898  989905   1982  4.166662  1.391354      78500.0  0.107867  2.163543e+07
269899  989906   1982  0.604649  0.718821     387500.0  0.107867  5.470875e+06
Got the dataframe having interest rates attached

Sampling the firms from the CRSP Data for Method 1 and 2

In [10]:
print("Sampling the Data of 1000 firms from the original dataframe")
number_of_firms_sample = 1000
sub_sample = pd.DataFrame(final.groupby('fyear').apply(lambda d: d.sample(number_of_firms_sample))).reset_index(drop = True)
sub_sample = sub_sample.dropna().reset_index(drop = True)

#Method 1 (Naive)
naive_method(sub_sample,0.05,0.25)
naive_method(sub_sample,0.05,0.5)
naive_method(sub_sample,0,0.25)
probability_default(sub_sample,"pd_naive_0.05_0.25","dd_naive_0.05_0.25")
probability_default(sub_sample,"pd_naive_0.05_0.5","dd_naive_0.05_0.5")
probability_default(sub_sample,"pd_naive_0_0.25","dd_naive_0_0.25")

#Method 2 (Unknown find out sigmav and v)
solver_method(sub_sample)
probability_default(sub_sample,"pd_solver","dd_solver")
print("Completed the DD and PD for the Method 1 and 2")
Sampling the Data of 1000 firms from the original dataframe
Completed the DD and PD for the Method 1 and 2
In [10]:
## Method 3 solver

def iterative_method(df):
    i = 0
    df["v_iterate"] = 0
    df["sigmav_iterate"] = 0
    while (i <len(df)):
        if(df.loc[i,:].isnull().values.any()):
            i += 1
            continue
        start_year = df.loc[i,"fyear"]
        cusip_id = df.loc[i,"cusip"]
        r = df.loc[i,"r"]
        e = df.loc[i,"e"]
        error = 1000000
        time_steps = len(df[(df["fyear"]==start_year)&(df["cusip"]==cusip_id)])
        try:
            f_lagged = final_data.loc[(final_data["fyear"]==(start_year+1))&(final_data["cusip"]==cusip_id),"f_lagged"].iloc[0]
            sigmav = df.loc[(df["fyear"]==(start_year+1))&(df["cusip"]==cusip_id),"sigmav_solver"].iloc[0]
        except:
            i += time_steps
            continue
        if(sigmav == 0 or np.isnan(sigmav) or f_lagged== 0 or np.isnan(f_lagged)):
            i += time_steps
            continue
        tr_df = final_data.loc[(final_data["fyear"]==(start_year))&(final_data["cusip"]==cusip_id)].reset_index(drop = True)
        while(error > 0.01):
            value_company = []
            for j in range(time_steps):
                e = tr_df.loc[j,"e"]
                initial = (f_lagged,r,e,sigmav,1)
                value_company.append(fsolve(equation_v,(e+f_lagged),args = initial)[0])
            sigmav_n = (np.std(pd.DataFrame(value_company).pct_change())*np.sqrt(12)).iloc[0]
            error = abs(sigmav_n - sigmav)
            sigmav = sigmav_n
        initial = (f_lagged,r, e,sigmav,1)
        df.loc[(i+time_steps),["v_iterate","sigmav_iterate"]] = (fsolve(equation_v,(e+f_lagged),args = initial),sigmav_n)
        i += time_steps
    return df

Resample to Monthly Data from the CRSP Data for Method 3

In [11]:
print("Started Resampling the data to monthly level for the daily CRSP Data")
monthly_data = daily_data.groupby("cusip").resample('M', on='date').first().reset_index(drop =True)
cols_taken = ["cusip","fyear","cum_ret","sigmae","r","dd_naive_0.05_0.25"
              ,"dd_naive_0.05_0.5","dd_naive_0_0.25","pd_naive_0.05_0.25","pd_naive_0.05_0.5",
              "pd_naive_0_0.25","dd_solver","pd_solver","sigmav_solver"]
monthly_data = monthly_data.merge(funda_data_filtered[["cusip","fyear","f_lagged"]],on = ["cusip","fyear"],how = "left")
method_3_data = pd.merge(monthly_data[["date","fyear","cusip","e","f_lagged"]],sub_sample[cols_taken],on = ["cusip","fyear"],how = "inner")
method_3_data.sort_values("date").reset_index(drop  = True, inplace=True)
method_3_data = method_3_data[method_3_data.replace([np.inf, -np.inf], np.nan).notnull().all(axis=1)] 
k = pd.DataFrame(sub_sample.groupby("fyear")["cusip"].unique()).reset_index()
print("Completed the resampling method")
Started Resampling the data to monthly level for the daily CRSP Data
Completed the resampling method
In [30]:
sub_sample_1 = pd.DataFrame()
for i in range(len(k)):
    df1 = method_3_data[method_3_data["cusip"].isin(np.random.choice(k.loc[i,"cusip"],250)) & (method_3_data["fyear"]==k.loc[i,"fyear"])]
    sub_sample_1 = sub_sample_1.append(df1)
sub_sample_1 = sub_sample_1.loc[:,sub_sample_1.columns!="f_lagged"].reset_index(drop = True)
monthly_data = monthly_data.dropna().reset_index(drop = True)
final_data = pd.merge(sub_sample_1[sub_sample_1.columns[~sub_sample_1.columns.isin(["fyear","e","f_lagged"])]],monthly_data[["date","fyear","cusip","e","f_lagged"]],on = ["date","cusip"],how= "right")
final_data = final_data.sort_values(by = ["cusip","fyear"],ascending = [True, True]).reset_index(drop= True)
sub_sample_1 = sub_sample_1.sort_values(by = ["cusip","fyear"],ascending = [True, True]).reset_index(drop= True)
iterative_method(sub_sample_1) 
df2 = sub_sample_1[sub_sample_1.replace([np.inf, -np.inf], np.nan).notnull().all(axis=1)] 
df2 = df2.merge(final_data[["cusip","date","f_lagged"]],on = ["cusip","date"],how = "left")
distance_default(df2,"dd_iterate","v_iterate","sigmav_iterate")
probability_default(df2,"pd_iterate","dd_iterate")
df2 = df2[(df2["pd_iterate"]<0.75) & (df2["dd_iterate"]<40)].reset_index(drop = True)
print(df2.head(10))
print("Completed the Method 3 calculations")
Started Sub-sampling for Method 3 by taking 250 random stocks every year fromthe previous universe
        date   fyear   cusip             e   cum_ret    sigmae         r  \
0 1987-01-02  1987.0  000872  4.650469e+06  0.576920  0.784840  0.054015   
1 2014-01-02  2014.0  00123Q  6.910280e+09  0.776853  0.308657  0.000700   
2 2020-01-02  2020.0  00164V  1.725654e+09  0.719750  0.271208  0.014987   
3 2002-01-02  2002.0  004308  7.314489e+07  0.969527  0.364142  0.016955   
4 2002-01-02  2002.0  005094  3.624881e+08  0.368761  1.423261  0.016955   
5 2010-01-04  2010.0  00754E  1.809596e+07  1.014702  0.902873  0.000800   
6 2017-01-03  2017.0  00766T  5.696076e+09  1.210790  0.356442  0.005286   
7 1989-01-03  1989.0  009128  2.703525e+07  2.459989  0.815196  0.078256   
8 2008-01-02  2008.0  009158  2.072771e+10  1.428171  0.236203  0.031305   
9 2017-01-03  2017.0  00971T  1.170684e+10  1.266956  0.410009  0.005286   

   dd_naive_0.05_0.25  dd_naive_0.05_0.5  dd_naive_0_0.25  ...  \
0            2.585115           2.472381         2.614980  ...   
1            0.660946           0.389213         1.011912  ...   
2            3.554045           2.997042         4.102567  ...   
3            3.298766           2.886574         3.571963  ...   
4            0.403461           0.217307         0.432287  ...   
5            1.750879           1.621753         1.781057  ...   
6            4.152293           3.801297         4.375832  ...   
7            4.911090           4.889719         4.916356  ...   
8           10.509100          10.245470        10.742813  ...   
9            8.806648           8.744746         8.837142  ...   

   pd_naive_0.05_0.5  pd_naive_0_0.25  dd_solver     pd_solver  sigmav_solver  \
0       6.710821e-03     4.461629e-03   2.702511  3.440893e-03       0.688955   
1       3.485593e-01     1.557901e-01   3.398021  3.393757e-04       0.031068   
2       1.363065e-03     2.042954e-05   5.066775  2.023064e-07       0.137009   
3       1.947305e-03     1.771577e-04   4.130768  1.807767e-05       0.222360   
4       4.139847e-01     3.327664e-01   9.120093  3.752977e-20       0.100000   
5       5.242810e-02     3.745158e-02  16.818457  8.937683e-64       0.100000   
6       7.197028e-05     6.048504e-06   4.820174  7.171637e-07       0.256029   
7       5.049004e-07     4.408492e-07   4.930112  4.109123e-07       0.803989   
8       6.201024e-25     3.203124e-27  10.985337  2.247791e-28       0.214271   
9       1.117585e-18     4.909936e-19   8.898620  2.827259e-19       0.399155   

      v_iterate  sigmav_iterate      f_lagged  dd_iterate     pd_iterate  
0  6.112028e+06        0.955322  6.855000e+05    1.869065   3.080686e-02  
1  6.941002e+10        0.056337  6.180650e+10    2.043682   2.049249e-02  
2  3.327452e+09        0.164601  1.715722e+09    4.032863   2.755072e-05  
3  1.199040e+08        0.211512  4.743750e+07    4.358460   6.549055e-06  
4  4.291978e+08        1.535766  2.483245e+08   -0.400551   6.556245e-01  
5  1.974382e+07        0.666758  4.114500e+06    2.019983   2.169259e-02  
6  7.886149e+09        0.285712  2.245807e+09    4.271838   9.693407e-06  
7  2.322499e+07        0.874568  4.075000e+05    4.274983   9.557602e-06  
8  2.334891e+10        0.105623  2.188950e+09   22.654662  6.275176e-114  
9  1.132562e+10        0.406950  3.200435e+08    8.573156   5.034333e-18  

[10 rows x 21 columns]
Completed the Method 3 calculations

Descriptive Stats Calculation and Correlation Between Methods

In [31]:
print("Started Descriptive Stats for the Methods")
desc_stats_columns = ["dd_naive_0.05_0.25","dd_naive_0.05_0.5","dd_naive_0_0.25","pd_naive_0.05_0.25","pd_naive_0.05_0.5",
              "pd_naive_0_0.25","dd_solver","pd_solver","dd_iterate","pd_iterate"]

descriptive_stats = df2[desc_stats_columns].describe()
print(descriptive_stats)
##Correlation among all the methods 

columns_DD = [x for x in desc_stats_columns if x.startswith('dd_')]
correlation_table_DD = df2[columns_DD].corr()
columns_PD = [x for x in desc_stats_columns if x.startswith('pd_')]
correlation_table_PD = df2[columns_PD].corr()
print("Correlation between Methods for Distance to Default is: \n",correlation_table_DD)
print("Correlation between Methods for Probability of Default is: \n",correlation_table_PD)
Started Descriptive Stats for the Methods
       dd_naive_0.05_0.25  dd_naive_0.05_0.5  dd_naive_0_0.25  \
count          460.000000         460.000000       460.000000   
mean             5.659096           5.367073         5.928887   
std              4.688725           4.715007         4.771131   
min             -0.143710          -0.385551        -0.133072   
25%              2.367071           2.121457         2.506398   
50%              4.498766           4.242542         4.798163   
75%              7.940009           7.469785         8.273296   
max             30.318641          30.312584        30.974943   

       pd_naive_0.05_0.25  pd_naive_0.05_0.5  pd_naive_0_0.25   dd_solver  \
count        4.600000e+02       4.600000e+02     4.600000e+02  460.000000   
mean         3.359798e-02       4.745028e-02     2.942411e-02    6.702601   
std          8.898814e-02       1.146346e-01     8.242654e-02    4.854504   
min         3.255725e-202      3.912740e-202    5.863449e-211   -0.710656   
25%          1.017570e-15       4.802325e-14     6.522762e-17    3.339019   
50%          3.417812e-06       1.106397e-05     8.006392e-07    5.502384   
75%          8.965823e-03       1.694169e-02     6.098431e-03    8.831079   
max          5.571352e-01       6.500853e-01     5.529316e-01   31.350340   

           pd_solver  dd_iterate     pd_iterate  
count   4.600000e+02  460.000000   4.600000e+02  
mean    1.253878e-02    7.393782   2.159151e-02  
std     5.111182e-02    5.923366   7.816331e-02  
min    4.812904e-216   -0.400551  2.776288e-262  
25%     5.185016e-19    3.298874   2.106595e-22  
50%     1.873775e-08    5.762499   4.146650e-09  
75%     4.205397e-04    9.665895   4.854143e-04  
max     7.613511e-01   34.577253   6.556245e-01  
Correlation between Methods for Distance to Default is: 
                     dd_naive_0.05_0.25  dd_naive_0.05_0.5  dd_naive_0_0.25  \
dd_naive_0.05_0.25            1.000000           0.999014         0.996143   
dd_naive_0.05_0.5             0.999014           1.000000         0.991726   
dd_naive_0_0.25               0.996143           0.991726         1.000000   
dd_solver                     0.878456           0.874444         0.884616   
dd_iterate                    0.848340           0.846041         0.847628   

                    dd_solver  dd_iterate  
dd_naive_0.05_0.25   0.878456    0.848340  
dd_naive_0.05_0.5    0.874444    0.846041  
dd_naive_0_0.25      0.884616    0.847628  
dd_solver            1.000000    0.754376  
dd_iterate           0.754376    1.000000  
Correlation between Methods for Probability of Default is: 
                     pd_naive_0.05_0.25  pd_naive_0.05_0.5  pd_naive_0_0.25  \
pd_naive_0.05_0.25            1.000000           0.989735         0.994577   
pd_naive_0.05_0.5             0.989735           1.000000         0.971832   
pd_naive_0_0.25               0.994577           0.971832         1.000000   
pd_solver                     0.606664           0.615562         0.604820   
pd_iterate                    0.661711           0.657398         0.660975   

                    pd_solver  pd_iterate  
pd_naive_0.05_0.25   0.606664    0.661711  
pd_naive_0.05_0.5    0.615562    0.657398  
pd_naive_0_0.25      0.604820    0.660975  
pd_solver            1.000000    0.529765  
pd_iterate           0.529765    1.000000  

Plotting Description Stats Across Time

In [32]:
descriptive_stats_across_time = df2[["fyear"]+desc_stats_columns].groupby("fyear").describe()
descriptive_stats_across_time.columns = descriptive_stats_across_time.columns.map('{0[0]}_{0[1]}'.format)
descriptive_stats_across_time = descriptive_stats_across_time.reset_index()
plot_chart(descriptive_stats_across_time[["fyear"]+[s + "_mean" for s in columns_DD]],"Mean of Distance to Default (DD) Across Time","DD in Years")
plot_chart(descriptive_stats_across_time[["fyear"]+[s + "_mean" for s in columns_PD]],"Mean of Probability of (PD) Across Time","Probability of Default")
plot_chart(descriptive_stats_across_time[["fyear"]+[s + "_25%" for s in columns_DD]],"25%ile of Distance to Default (DD) Across Time","DD in Years")
plot_chart(descriptive_stats_across_time[["fyear"]+[s + "_25%" for s in columns_PD]],"25%ile of Probability of (PD) Across Time","Probability of Default")
plot_chart(descriptive_stats_across_time[["fyear"]+[s + "_50%" for s in columns_DD]],"50%ile of Distance to Default (DD) Across Time","DD in Years")
plot_chart(descriptive_stats_across_time[["fyear"]+[s + "_50%" for s in columns_PD]],"50%ile of Probability of (PD) Across Time","Probability of Default")
plot_chart(descriptive_stats_across_time[["fyear"]+[s + "_75%" for s in columns_DD]],"75%ile of Distance to Default (DD) Across Time","DD in Years")
plot_chart(descriptive_stats_across_time[["fyear"]+[s + "_75%" for s in columns_PD]],"75%ile of Probability of (PD) Across Time","Probability of Default")

print("Completed Initial Descriptive Plots")
Started Descriptive Stats across time by grouping on time
Completed Initial Descriptive Plots

Loading NBER, BAA-Spread and CFSI Data

In [34]:
##Loading the NBER Data
nber_data = pd.read_csv('/Users/global_minima/Downloads/USREC.csv')
nber_data["DATE"] = pd.to_datetime(nber_data["DATE"])
nber_data = nber_data.rename(columns = {"USREC":"Recession_Flag"})
nber_data["fyear"] = nber_data["DATE"].dt.year
recession_data = nber_data.groupby('fyear')["Recession_Flag"].mean().round().reset_index()

##Loading the BAA-Fed Fund Spread Data
baafa_data = pd.read_csv('/Users/global_minima/Downloads/BAAFFM.csv')
baafa_data["DATE"] = pd.to_datetime(baafa_data["DATE"])
baafa_data = baafa_data.rename(columns = {"BAAFFM":"BAA_Spread_Percentage"})
baafa_data["fyear"] = baafa_data["DATE"].dt.year
baafa_data = baafa_data.groupby('fyear')["BAA_Spread_Percentage"].mean().reset_index()

##Loading the Cleveland Financial Stress Index Data
cfsi_data = pd.read_csv('/Users/global_minima/Downloads/CFSI.csv')
cfsi_data["DATE"] = pd.to_datetime(cfsi_data["DATE"])
cfsi_data = cfsi_data.rename(columns = {"CFSI":"CFSI_Stress_Value"})
cfsi_data["fyear"] = cfsi_data["DATE"].dt.year
cfsi_data = cfsi_data.groupby('fyear')["CFSI_Stress_Value"].mean().reset_index()
print("NBER data is: \n",recession_data.head(10))
print("BAA Spread data is: \n",baafa_data.head(10))
print("Cleveland Financial Stress Index data is: \n",cfsi_data.head(10))
print("Completed reading all the data")
Started Loading the NBER, BAA-Spread and CFSI Data
NBER data is: 
    fyear  Recession_Flag
0   1854             1.0
1   1855             0.0
2   1856             0.0
3   1857             0.0
4   1858             1.0
5   1859             0.0
6   1860             0.0
7   1861             0.0
8   1862             0.0
9   1863             0.0
BAA Spread data is: 
    fyear  BAA_Spread_Percentage
0   1954               2.463333
1   1955               1.745000
2   1956               1.149167
3   1957               1.609167
4   1958               3.154167
5   1959               1.748333
6   1960               1.973333
7   1961               3.120000
8   1962               2.311667
9   1963               1.677500
Cleveland Financial Stress Index data is: 
    fyear  CFSI_Stress_Value
0   1991          -0.330612
1   1992          -0.770000
2   1993          -0.591205
3   1994          -0.670795
4   1995          -0.983315
5   1996          -1.153743
6   1997          -0.820685
7   1998           0.099671
8   1999           0.283644
9   2000           0.305601
Completed reading all the data

Plotting PD and DD across time against Economic Indicators

In [35]:
print("Started Plotting Economic Indicators against the DD and PD for 3 Methods calcualted above")
merged_df = pd.merge(df2,recession_data,on = "fyear", how = "inner")
not_recession_df = merged_df[merged_df["Recession_Flag"]==0]
recession_df = merged_df[merged_df["Recession_Flag"]==1]

not_recession_df = not_recession_df[["fyear"]+desc_stats_columns].groupby(["fyear"]).mean().reset_index()
recession_df = recession_df[["fyear"]+desc_stats_columns].groupby(["fyear"]).mean().reset_index()

plot_chart(not_recession_df[["fyear"]+columns_DD],"Mean of Distance to Default (DD) Across Non Recessionary Period","DD in Years")
plot_chart(recession_df[["fyear"]+columns_DD],"Mean of  Distance to Default (DD) Across Recessionary Period","DD in Years")

##Plot charts for PD
plot_chart(not_recession_df[["fyear"]+columns_PD],"Mean of Probability of Default (PD) Across Non Recessionary Period","Probability of Default")
plot_chart(recession_df[["fyear"]+columns_PD],"Mean of  Probability of Default (PD) Across Recessionary Period","Probability of Default")

merged_baa = pd.merge(df2,baafa_data,on = "fyear", how = "inner")
merged_baa = merged_baa[["fyear","BAA_Spread_Percentage"]+desc_stats_columns].groupby(["fyear"]).mean().reset_index()
plot_chart(merged_baa[["fyear"]+columns_DD+["BAA_Spread_Percentage"]],"Mean of Distance to Default (DD) Across Time VS BAA Spread","DD in Years",True)
plot_chart(merged_baa[["fyear"]+columns_PD+["BAA_Spread_Percentage"]],"Mean of Probability of Default (PD) Across Time VS BAA Spread","Probability of Default",True)

merged_cfsi = pd.merge(df2,cfsi_data,on = "fyear", how = "inner")
merged_cfsi = merged_cfsi[["fyear","CFSI_Stress_Value"]+desc_stats_columns].groupby(["fyear"]).mean().reset_index()
plot_chart(merged_cfsi[["fyear"]+columns_DD+["CFSI_Stress_Value"]],"Mean of Distance to Default (DD) Across Time VS CFSI","DD in Years",True)
plot_chart(merged_cfsi[["fyear"]+columns_PD+["CFSI_Stress_Value"]],"Mean of Probability of Default (PD) Across Time VS CFSI","Probability of Default",True)
print("Code Executed Succesfully!!!!")
Started Plotting Economic Indicators against the DD and PD for 3 Methods calcualted above
Code Executed Succesfully!!!!